
# DYNAMIC TREE CUT --------------------------------------------------------

#load packages
library(dynamicTreeCut)
library(fastcluster)

#import features table and subset for variables of interest
features <- read.table(file = "features_for_clustering.txt", header = TRUE)
features <- features[, 3:12]

#scale and center
features <- scale(features)

#clustering and dynamic tree cut
clusters <- hclust(dist(features), method = "average")
dist <- as.matrix(dist(features))
treecut <- cutreeHybrid(clusters, dist, maxAbsCoreScatter = 1, minAbsGap = 0.5, cutHeight = 3, minClusterSize = 5)

#clear workspace
rm(list = ls())

# BAYESIAN LOGISTIC REGRESSION --------------------------------------------

#load packages
library(rstanarm)

#import data
features <- read.csv("data.csv")
features <- features[, 2:length(colnames(features))]
songs_with_seconds <- read.csv("songs_with_seconds.csv")

#reformat data for analysis
s <- c() #sex
nuss <- c() #number of unique syllables
bw <- c() #bandwidth
af <- c() #average freqency
c <- c() #concavity
e <- c() #excursion
ls <- c() #length of song

list_of_names <- c("Aceros", "Botaurus", "Corvus", "Cotinga", "Eubucco", "Fluvicola", "Guttera", "Halcyon", "Heterocircus", "Ixoreus", "Lessonia", "Mimizuku", "Orthonyx", "Podager", "Rowettia", "Seleludis", "Thraupis", "Tockus", "Upupa")
i <- 1

while(i <= length(list_of_names)){
  
  sub_a <- features[grep(list_of_names[i], features$song_id), ]
  sub_a_names <- unique(sub_a$song_id)
  
  sub_a_nuss <- c()
  sub_a_bw <- c()
  sub_a_af <- c()
  sub_a_c <- c()
  sub_a_e <- c()
  sub_a_ls <- c()
  
  k <- 1
  
  while(k <= length(sub_a_names)){
    
    sub_b <- sub_a[grep(sub_a_names[k], sub_a$song_id), ]
    
    sub_b_nuss <- length(unique(sub_b$cluster))
    sub_b_bw <- max(sub_b$highest_freq) - min(sub_b$lowest_freq)
    sub_b_af <- mean(sub_b$avg_freq)
    sub_b_c <- sum(sub_b$concavity)/sum(sub_b$duration)
    sub_b_e <- sum(sub_b$excursion)/sum(sub_b$duration)
    sub_b_ls <- songs_with_seconds[grep(sub_a_names[k], songs_with_seconds$song_id), ]$song_length_sec
    
    sub_a_nuss <- c(sub_a_nuss, sub_b_nuss)
    sub_a_bw <- c(sub_a_bw, sub_b_bw)
    sub_a_af <- c(sub_a_af, sub_b_af)
    sub_a_c <- c(sub_a_c, sub_b_c)
    sub_a_e <- c(sub_a_e, sub_b_e)
    sub_a_ls <- c(sub_a_ls, sub_b_ls)
    
    k <- k + 1
  }
  
  s <- c(s, unique(as.character(sub_b$sex)))
  nuss <- c(nuss, mean(sub_a_nuss))
  bw <- c(bw, mean(sub_a_bw))
  af <- c(af, mean(sub_a_af))
  c <- c(c, mean(sub_a_c))
  e <- c(e, mean(sub_a_e))
  ls <- c(ls, mean(sub_a_ls))
  
  i <- i + 1
}

s[which(s == "M")] <- 0
s[which(s == "F")] <- 1
s <- as.numeric(s)

glm_data <- data.frame(s, nuss, bw, af, c, e, ls)

#bayesian logistic regression
bayes_logist <- stan_glm(s ~ nuss + bw + af + c + e + ls, data = glm_data, family = binomial(link = "logit"), iter = 10000, seed = 123)
